library(tidyverse)
library(colorout)
library(here)
library(ggplot2)
library(scales)
library(reticulate)
library(qvalue)
library(flextable)
library(officer)
library(ggvenn)
library(extrafont)
library(forcats)
library(ggrepel)
library(ggtext)
library(R.SamBada)
library(rgdal)
library(stats)
library(geosphere)
library(pcadapt)pcadapt performs principal component analysis and computes p-values to test for outliers. The test for outliers is based on the correlations between genetic variation and the first K principal components. pcadapt also handles Pool-seq data for which the statistical analysis is performed on the genetic markers frequencies. Returns an object of class pcadapt.
# Run pcadapt with a large number of PCs
pcadapt_res <- pcadapt(x, K = 30)
# Create a scree plot
plot(pcadapt_res, option = "screeplot")Choose PCs
# perform the analysis
pcadapt_res <- pcadapt(
x,
method = c("mahalanobis"),
min.maf = 0.1,
LD.clumping = NULL,
tol = 1e-04,
K = 2) # choose the right K for your data
str(pcadapt_res)## List of 11
## $ scores : num [1:60, 1:2] 0.0907 0.1033 0.1002 0.102 0.063 ...
## $ singular.values: num [1:2] 0.381 0.271
## $ loadings : num [1:41620, 1:2] -0.00725 NA 0.00481 0.00327 0.00398 ...
## $ zscores : num [1:41620, 1:2] -4.84 NA 2.6 1.95 2.12 ...
## $ af : num [1:41620] 0.879 0.973 0.723 0.858 0.617 ...
## $ maf : num [1:41620] 0.1207 0.0273 0.2768 0.1417 0.3833 ...
## $ chi2.stat : num [1:41620] 4.214 NA 1.094 0.981 0.467 ...
## $ stat : num [1:41620] 5.699 NA 1.479 1.327 0.632 ...
## $ gif : num 1.35
## $ pvalues : num [1:41620] 0.122 NA 0.579 0.612 0.792 ...
## $ pass : int [1:31945] 1 3 4 5 6 7 8 9 10 12 ...
## - attr(*, "K")= num 2
## - attr(*, "method")= chr "mahalanobis"
## - attr(*, "min.maf")= num 0.1
## - attr(*, "class")= chr "pcadapt"
# ____________________________________________________________________________
# import the bim file with the SNP data ####
snps <-
read_delim( # to learn about the options use here, run ?read_delim on the console.
here(
"output", "outflank", "autogenous.bim"
), # use library here to load it
col_names = FALSE, # we don't have header in the input file
show_col_types = FALSE, # suppress message from read_delim
col_types = "ccidcc" # set the class of each column
)
#
# set column names
colnames(
snps
) <- # to add a header in our tibble
c(
"Scaffold", "SNP", "Cm", "Position", "Allele1", "Allele2"
)
#
# check the tibble
head(snps)## # A tibble: 6 × 6
## Scaffold SNP Cm Position Allele1 Allele2
## <chr> <chr> <int> <dbl> <chr> <chr>
## 1 1 AX-581444870 0 97856 C T
## 2 1 AX-583035083 0 305518 A G
## 3 1 AX-583035102 0 308124 A G
## 4 1 AX-583033342 0 315059 C G
## 5 1 AX-583035163 0 315386 A G
## 6 1 AX-583035198 0 330908 G T
# Combine into a data frame
pcadapt_res_df <- data.frame(
SNP = snps$SNP,
Chromosome = snps$Scaffold,
Position = snps$Position,
pvalues = pcadapt_res$pvalues,
stat = pcadapt_res$stat
) |> drop_na()
head(pcadapt_res_df)## SNP Chromosome Position pvalues stat
## 1 AX-581444870 1 97856 0.1215846 5.6988143
## 2 AX-583035102 1 308124 0.5787388 1.4791127
## 3 AX-583033342 1 315059 0.6122633 1.3268178
## 4 AX-583035163 1 315386 0.7916782 0.6317767
## 5 AX-583035198 1 330908 0.6550057 1.1443128
## 6 AX-583035257 1 442875 0.2441440 3.8133644
Adjust
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Get the SNP IDs of the outliers
outlier_snps <- pcadapt_res_df$SNP[outliers_indices]
# Print the SNP IDs
length(outlier_snps)## [1] 42
# Save it
write.table(
outlier_snps,
file = here("output", "pcadapt","SNPs_pcadapt.txt"),
row.names = FALSE,
quote = FALSE,
col.names = FALSE,
sep = "\n"
)# Set colors for the chromosomes
color_vector <- c("#CCF6D6", "#F6E1CC", "#CCD8F6") # Add or remove colors as needed.
names(color_vector) <- unique(pcadapt_res_df$Chromosome) # Make sure unique(df_sub$Chromosome) gives all unique Chromosome values.
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Create a new 'highlight' column, initially set to FALSE
pcadapt_res_df$highlight <- FALSE
# Update the 'highlight' column to highlight these outlier SNPs
pcadapt_res_df$highlight[outliers_indices] <- TRUE
# Identify the SNP with the smallest p-value in each chromosome among the associated SNPs
min_pval_snps <- pcadapt_res_df |>
filter(highlight) |>
group_by(Chromosome) |>
slice(which.min(pvalues))
# Custom label function
k_format <- function(x) {
paste0(format(x / 1e6, big.mark = "", scientific = FALSE), "k")
}
# source the plotting function
source(
here(
"scripts", "analysis", "my_theme2.R"
)
)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .5) +
geom_point(
color = "magenta",
data = subset(pcadapt_res_df, highlight == TRUE),
size = .5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(title = "pcadapt Brazil", x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after Benjamini-Hochberg adjustment (p<0.05).") +
facet_wrap(~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_markdown(face = "italic", color = "#574E4E")) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "pcadapt.pdf"
),
width = 8,
height = 5,
units = "in"
)Choosing a cutoff for outlier detection
q-values
qval <- qvalue(pcadapt_res$pvalues)$qvalues
alpha <- 0.05
outliers <- which(qval < alpha)
length(outliers)## [1] 43
Benjamini-Hochberg Procedure
padj <- p.adjust(pcadapt_res$pvalues,method="BH")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 42
Bonferroni correction
padj <- p.adjust(pcadapt_res$pvalues,method="bonferroni")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 13
Loadings
par(mfrow = c(2, 2))
for (i in 1:2)
plot(pcadapt_res$loadings[, i], pch = 19, cex = .3, ylab = paste0("Loadings PC", i))# Read data from txt files
outflank_snps <-
read.table(
here("output", "outflank", "SNPs_outFlank.txt"),
stringsAsFactors = FALSE
) |>
drop_na()
pcadapt_snps <-
read.table(
here("output", "pcadapt", "SNPs_pcadapt.txt"),
stringsAsFactors = FALSE
)
# Create a list with all dataframes
list_of_clusters <- list("outFlank" = outflank_snps$V1, "pcadapt" = pcadapt_snps$V1)
# Create Venn diagram
venn_diagram <- ggvenn(list_of_clusters, fill_color = c("steelblue", "darkorange"))
print(venn_diagram)# Find common SNPs
common_SNPs <- Reduce(intersect, list_of_clusters)
# Save the shared SNP ids into a txt file
write.table(
common_SNPs,
file = here("output", "pcadapt", "common_SNPs_pcadapt_outflank.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE
)
# # Save Venn diagram to PDF
output_path <- here("output", "pcadapt", "figures", "significant_snps.pdf")
ggsave(output_path, venn_diagram, height = 5, width = 5, dpi = 300)With python Clean env
Venn
import pandas as pd
from matplotlib_venn import venn2
import matplotlib.pyplot as plt
import gc
# File paths
path_outflank = 'output/outflank/SNPs_outFlank.txt'
path_pcadapt = 'output/pcadapt/SNPs_pcadapt.txt'
pdf_output_path = 'output/pcadapt/venn_3_pop_pcadapt_outflank.pdf' # PDF output file path
# Read the data (assuming the SNP IDs are in the first column)
outflank_snps = pd.read_csv(path_outflank, sep="\t", header=None)
pcadapt_snps = pd.read_csv(path_pcadapt, sep="\t", header=None)
# Drop NA values if any
outflank_snps.dropna(inplace=True)
pcadapt_snps.dropna(inplace=True)
# Create sets from the first column of each dataframe
set_outflank = set(outflank_snps[0])
set_pcadapt = set(pcadapt_snps[0])
# Create a Venn diagram
venn2([set_outflank, set_pcadapt], ('outFlank', 'pcadapt'), set_colors=('blue', 'gray'))## <matplotlib_venn._common.VennDiagram object at 0x7fcda1e33700>

# Clearing the environment
del path_outflank, path_pcadapt, pdf_output_path, outflank_snps, pcadapt_snps, set_outflank, set_pcadapt
plt.clf() # Clear the current figure
plt.cla() # Clear the current axes
# Garbage collection
gc.collect()## 396

Create plot with the shared SNPs
# Filter the dataframe
filtered_df <- pcadapt_res_df |> filter(SNP %in% common_SNPs)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic")
) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "pcadapt_outflank.pdf"
),
width = 8,
height = 5,
units = "in"
)We can create windows and count how many highlighted SNPs we have in each window. Then we can focus in genomic areas with several SNPs
cluster_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNP_count = n(), .groups = 'drop')
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
geom_vline(
data = cluster_df,
aes(xintercept = Window_Start + 5000000),
color = "lightgray",
linetype = "dashed",
linewidth = 0.5
) +
geom_text_repel(
data = cluster_df,
aes(x = Window_Start + 5000000, y = 15, label = SNP_count),
vjust = -1,
hjust = -0.5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank \n Gray bars represent 10Mb windows and \n the number of SNPs per window is annotated in black.") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic"),
legend.position = "none" # Remove legend
) +
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)Check the SNP ids for each cluster
# Create a new dataframe to capture SNPs within each 100kb window for each chromosome
cluster_snps_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNPs = list(SNP), .groups = 'drop')
# Unnest the SNPs column to show each SNP on its own row
unnested_cluster_snps_df <- cluster_snps_df %>%
unnest(SNPs)
# View the unnested data
head(unnested_cluster_snps_df)## # A tibble: 6 × 3
## Chromosome Window_Start SNPs
## <chr> <dbl> <chr>
## 1 1 140000000 AX-583515734
## 2 2 30000000 AX-584444558
## 3 2 50000000 AX-584502184
## 4 2 180000000 AX-585196879
## 5 2 320000000 AX-584907866
## 6 2 380000000 AX-579548089
# Convert the list column to a string column
cluster_snps_df_str <- cluster_snps_df %>%
mutate(SNPs = sapply(SNPs, function(x) paste(x, collapse = ", ")))
# Create the flextable
my_flextable <- flextable(cluster_snps_df_str, cwidth = 2.75)
my_flextable <- autofit(my_flextable)
# Display the flextable
my_flextableChromosome | Window_Start | SNPs |
|---|---|---|
1 | 140,000,000 | AX-583515734 |
2 | 30,000,000 | AX-584444558 |
2 | 50,000,000 | AX-584502184 |
2 | 180,000,000 | AX-585196879 |
2 | 320,000,000 | AX-584907866 |
2 | 380,000,000 | AX-579548089 |
2 | 390,000,000 | AX-579560686, AX-579584369, AX-579583040, AX-579584883 |
2 | 400,000,000 | AX-579604213, AX-579607140, AX-579619995 |
2 | 410,000,000 | AX-579630465, AX-579632196, AX-579632983, AX-579632927, AX-579661979, AX-579667335 |
2 | 510,000,000 | AX-579987164 |
2 | 560,000,000 | AX-580192850, AX-580195612 |
3 | 0 | AX-580344997 |
3 | 180,000,000 | AX-581275893 |
3 | 190,000,000 | AX-581302901 |
3 | 210,000,000 | AX-581437212, AX-581438408, AX-581442470, AX-581461793, AX-581462648, AX-581467653 |
3 | 220,000,000 | AX-581504582, AX-581512892 |
3 | 230,000,000 | AX-581527485, AX-581534474 |
pcadapt performs principal component analysis and computes p-values to test for outliers. The test for outliers is based on the correlations between genetic variation and the first K principal components. pcadapt also handles Pool-seq data for which the statistical analysis is performed on the genetic markers frequencies. Returns an object of class pcadapt.
Clean env and memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3134638 167.5 4677312 249.8 NA 4677312 249.8
## Vcells 6506499 49.7 17479945 133.4 32768 17479945 133.4
# Run pcadapt with a large number of PCs
pcadapt_res <- pcadapt(x, K = 30)
# Create a scree plot
plot(pcadapt_res, option = "screeplot")Choose PCs
# perform the analysis
pcadapt_res <- pcadapt(
x,
method = c("mahalanobis"),
min.maf = 0.1,
LD.clumping = NULL,
tol = 1e-04,
K = 2) # choose the right K for your data
str(pcadapt_res)## List of 11
## $ scores : num [1:38, 1:2] -0.258 -0.295 -0.291 -0.275 -0.191 ...
## $ singular.values: num [1:2] 0.386 0.326
## $ loadings : num [1:41100, 1:2] NA NA -0.00249 -0.00146 -0.00379 ...
## $ zscores : num [1:41100, 1:2] NA NA -1.11 -0.665 -1.531 ...
## $ af : num [1:41100] 0.958 0.973 0.667 0.829 0.566 ...
## $ maf : num [1:41100] 0.0417 0.027 0.3333 0.1711 0.4342 ...
## $ chi2.stat : num [1:41100] NA NA 0.151 0.311 0.824 ...
## $ stat : num [1:41100] NA NA 0.211 0.435 1.154 ...
## $ gif : num 1.4
## $ pvalues : num [1:41100] NA NA 0.927 0.856 0.662 ...
## $ pass : int [1:29986] 3 4 5 6 7 8 9 10 12 14 ...
## - attr(*, "K")= num 2
## - attr(*, "method")= chr "mahalanobis"
## - attr(*, "min.maf")= num 0.1
## - attr(*, "class")= chr "pcadapt"
# ____________________________________________________________________________
# import the bim file with the SNP data ####
snps <-
read_delim( # to learn about the options use here, run ?read_delim on the console.
here(
"output", "outflank", "man_aut.bim"
), # use library here to load it
col_names = FALSE, # we don't have header in the input file
show_col_types = FALSE, # suppress message from read_delim
col_types = "ccidcc" # set the class of each column
)
#
# set column names
colnames(
snps
) <- # to add a header in our tibble
c(
"Scaffold", "SNP", "Cm", "Position", "Allele1", "Allele2"
)
#
# check the tibble
head(snps)## # A tibble: 6 × 6
## Scaffold SNP Cm Position Allele1 Allele2
## <chr> <chr> <int> <dbl> <chr> <chr>
## 1 1 AX-581444870 0 97856 C T
## 2 1 AX-583035083 0 305518 A G
## 3 1 AX-583035102 0 308124 A G
## 4 1 AX-583033342 0 315059 C G
## 5 1 AX-583035163 0 315386 A G
## 6 1 AX-583035198 0 330908 G T
# Combine into a data frame
pcadapt_res_df <- data.frame(
SNP = snps$SNP,
Chromosome = snps$Scaffold,
Position = snps$Position,
pvalues = pcadapt_res$pvalues,
stat = pcadapt_res$stat
) |> drop_na()
head(pcadapt_res_df)## SNP Chromosome Position pvalues stat
## 1 AX-583035102 1 308124 0.9273580 0.2113363
## 2 AX-583033342 1 315059 0.8561877 0.4350990
## 3 AX-583035163 1 315386 0.6623355 1.1544955
## 4 AX-583035198 1 330908 0.8934176 0.3158213
## 5 AX-583035257 1 442875 0.2274103 4.1501875
## 6 AX-583033504 1 492687 0.4437341 2.2769426
Adjust
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Get the SNP IDs of the outliers
outlier_snps <- pcadapt_res_df$SNP[outliers_indices]
# Print the SNP IDs
length(outlier_snps)## [1] 111
# Save it
write.table(
outlier_snps,
file = here("output", "pcadapt","man_aut_SNPs_pcadapt.txt"),
row.names = FALSE,
quote = FALSE,
col.names = FALSE,
sep = "\n"
)# Set colors for the chromosomes
color_vector <- c("#CCF6D6", "#F6E1CC", "#CCD8F6") # Add or remove colors as needed.
names(color_vector) <- unique(pcadapt_res_df$Chromosome) # Make sure unique(df_sub$Chromosome) gives all unique Chromosome values.
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Create a new 'highlight' column, initially set to FALSE
pcadapt_res_df$highlight <- FALSE
# Update the 'highlight' column to highlight these outlier SNPs
pcadapt_res_df$highlight[outliers_indices] <- TRUE
# Identify the SNP with the smallest p-value in each chromosome among the associated SNPs
min_pval_snps <- pcadapt_res_df |>
filter(highlight) |>
group_by(Chromosome) |>
slice(which.min(pvalues))
# Custom label function
k_format <- function(x) {
paste0(format(x / 1e6, big.mark = "", scientific = FALSE), "k")
}
# source the plotting function
source(
here(
"scripts", "analysis", "my_theme2.R"
)
)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .5) +
geom_point(
color = "magenta",
data = subset(pcadapt_res_df, highlight == TRUE),
size = .5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(title = "pcadapt Brazil", x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after Benjamini-Hochberg adjustment (p<0.05).") +
facet_wrap(~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_markdown(face = "italic", color = "#574E4E")) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "man_aut_pcadapt.pdf"
),
width = 8,
height = 5,
units = "in"
)Choosing a cutoff for outlier detection
q-values
qval <- qvalue(pcadapt_res$pvalues)$qvalues
alpha <- 0.05
outliers <- which(qval < alpha)
length(outliers)## [1] 111
Benjamini-Hochberg Procedure
padj <- p.adjust(pcadapt_res$pvalues,method="BH")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 111
Bonferroni correction
padj <- p.adjust(pcadapt_res$pvalues,method="bonferroni")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 34
Loadings
par(mfrow = c(2, 2))
for (i in 1:2)
plot(pcadapt_res$loadings[, i], pch = 19, cex = .3, ylab = paste0("Loadings PC", i))# Read data from txt files
outflank_snps <-
read.table(
here("output", "outflank", "man_aut_SNPs_outFlank.txt"),
stringsAsFactors = FALSE
) |>
drop_na()
pcadapt_snps <-
read.table(
here("output", "pcadapt", "man_aut_SNPs_pcadapt.txt"),
stringsAsFactors = FALSE
)
# Create a list with all dataframes
list_of_clusters <- list("outFlank" = outflank_snps$V1, "pcadapt" = pcadapt_snps$V1)
# Create Venn diagram
venn_diagram <- ggvenn(list_of_clusters, fill_color = c("steelblue", "darkorange"))
print(venn_diagram)# Find common SNPs
common_SNPs <- Reduce(intersect, list_of_clusters)
# Save the shared SNP ids into a txt file
write.table(
common_SNPs,
file = here("output", "pcadapt", "man_aut_common_SNPs_pcadapt_outflank.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE
)
# # Save Venn diagram to PDF
output_path <- here("output", "pcadapt", "figures", "man_aut_significant_snps.pdf")
ggsave(output_path, venn_diagram, height = 5, width = 5, dpi = 300)With python
Clean env
Venn
import pandas as pd
from matplotlib_venn import venn2
import matplotlib.pyplot as plt
# File paths
path_outflank = 'output/outflank/man_aut_SNPs_outFlank.txt'
path_pcadapt = 'output/pcadapt/man_aut_SNPs_pcadapt.txt'
pdf_output_path = 'output/pcadapt/venn_MAN_AUT_pcadapt_outflank.pdf' # PDF output file path
# Read the data (assuming the SNP IDs are in the first column)
outflank_snps = pd.read_csv(path_outflank, sep="\t", header=None)
pcadapt_snps = pd.read_csv(path_pcadapt, sep="\t", header=None)
# Drop NA values if any
outflank_snps.dropna(inplace=True)
pcadapt_snps.dropna(inplace=True)
# Create sets from the first column of each dataframe
set_outflank = set(outflank_snps[0])
set_pcadapt = set(pcadapt_snps[0])
# Create a Venn diagram
venn2([set_outflank, set_pcadapt], ('outFlank', 'pcadapt'), set_colors=('yellow', 'gray'))## <matplotlib_venn._common.VennDiagram object at 0x7fcda216fe80>

# Clearing the environment
del path_outflank, path_pcadapt, pdf_output_path, outflank_snps, pcadapt_snps, set_outflank, set_pcadapt
plt.clf() # Clear the current figure
plt.cla() # Clear the current axes
# Garbage collection
gc.collect()## 406

Create plot with the shared SNPs
# Filter the dataframe
filtered_df <- pcadapt_res_df |> filter(SNP %in% common_SNPs)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic")
) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "man_aut_pcadapt_outflank.pdf"
),
width = 8,
height = 5,
units = "in"
)We can create windows and count how many highlighted SNPs we have in each window. Then we can focus in genomic areas with several SNPs
cluster_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNP_count = n(), .groups = 'drop')
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
geom_vline(
data = cluster_df,
aes(xintercept = Window_Start + 5000000),
color = "lightgray",
linetype = "dashed",
linewidth = 0.5
) +
geom_text_repel(
data = cluster_df,
aes(x = Window_Start + 5000000, y = 15, label = SNP_count),
vjust = -1,
hjust = -0.5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank \n Gray bars represent 10Mb windows and \n the number of SNPs per window is annotated in black.") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic"),
legend.position = "none" # Remove legend
) +
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)Check the SNP ids for each cluster
# Create a new dataframe to capture SNPs within each 100kb window for each chromosome
cluster_snps_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNPs = list(SNP), .groups = 'drop')
# Unnest the SNPs column to show each SNP on its own row
unnested_cluster_snps_df <- cluster_snps_df %>%
unnest(SNPs)
# View the unnested data
head(unnested_cluster_snps_df)## # A tibble: 6 × 3
## Chromosome Window_Start SNPs
## <chr> <dbl> <chr>
## 1 1 0 AX-583054970
## 2 1 10000000 AX-583095890
## 3 1 90000000 AX-583324654
## 4 1 120000000 AX-583423493
## 5 1 120000000 AX-583426050
## 6 1 130000000 AX-583467551
# Convert the list column to a string column
cluster_snps_df_str <- cluster_snps_df %>%
mutate(SNPs = sapply(SNPs, function(x) paste(x, collapse = ", ")))
# Create the flextable
my_flextable <- flextable(cluster_snps_df_str, cwidth = 2.75)
my_flextable <- autofit(my_flextable)
# Display the flextable
my_flextableChromosome | Window_Start | SNPs |
|---|---|---|
1 | 0 | AX-583054970 |
1 | 10,000,000 | AX-583095890 |
1 | 90,000,000 | AX-583324654 |
1 | 120,000,000 | AX-583423493, AX-583426050 |
1 | 130,000,000 | AX-583467551, AX-583504302 |
1 | 140,000,000 | AX-583514148, AX-583515734, AX-583518994, AX-583517999 |
1 | 170,000,000 | AX-583631472 |
1 | 270,000,000 | AX-583905588, AX-583924569 |
1 | 340,000,000 | AX-584133664 |
2 | 20,000,000 | AX-584419201 |
2 | 30,000,000 | AX-584444558, AX-584453518 |
2 | 40,000,000 | AX-584473131 |
2 | 50,000,000 | AX-585010439, AX-584502184, AX-585023213 |
2 | 70,000,000 | AX-585042322 |
2 | 80,000,000 | AX-584546335 |
2 | 110,000,000 | AX-585101539 |
2 | 170,000,000 | AX-585182739 |
2 | 180,000,000 | AX-585190366, AX-585196879 |
2 | 200,000,000 | AX-582494788, AX-582519052, AX-582532563 |
2 | 260,000,000 | AX-584765066 |
2 | 320,000,000 | AX-585413917 |
2 | 350,000,000 | AX-584951049 |
2 | 360,000,000 | AX-579474142, AX-579474650 |
2 | 370,000,000 | AX-579509845 |
2 | 380,000,000 | AX-579548089 |
2 | 390,000,000 | AX-579560686, AX-579580051, AX-579584369, AX-579583040, AX-579584883 |
2 | 400,000,000 | AX-579604213, AX-579603553, AX-579607140, AX-579606795, AX-579619995, AX-579620627 |
2 | 410,000,000 | AX-579630462, AX-579630465, AX-579632196, AX-579632927, AX-579636106, AX-579638540, AX-579640459, AX-579646083, AX-579661979, AX-579662371 |
2 | 450,000,000 | AX-579768613, AX-579778684 |
2 | 460,000,000 | AX-579808128, AX-579816168 |
2 | 490,000,000 | AX-579909895 |
2 | 510,000,000 | AX-579955661, AX-579982534 |
2 | 520,000,000 | AX-580001880, AX-580026657, AX-580027537, AX-580042929 |
2 | 540,000,000 | AX-580099245 |
2 | 550,000,000 | AX-580165355 |
2 | 560,000,000 | AX-580195612 |
2 | 580,000,000 | AX-580318010, AX-580321888 |
3 | 0 | AX-580342672 |
3 | 20,000,000 | AX-580425125 |
3 | 70,000,000 | AX-580620056 |
3 | 80,000,000 | AX-580673467 |
3 | 90,000,000 | AX-580719743 |
3 | 130,000,000 | AX-580926966, AX-580930203 |
3 | 180,000,000 | AX-581275893 |
3 | 190,000,000 | AX-581298415, AX-581302704 |
3 | 200,000,000 | AX-581399096 |
3 | 210,000,000 | AX-581408938, AX-581413330, AX-581437212, AX-581438408, AX-581442470, AX-581458697, AX-581461793, AX-581462648, AX-581467653, AX-581470736 |
3 | 220,000,000 | AX-581485809, AX-581488185 |
3 | 230,000,000 | AX-581572119 |
3 | 280,000,000 | AX-581761610 |
3 | 350,000,000 | AX-582085824 |
3 | 380,000,000 | AX-582237115 |
3 | 440,000,000 | AX-582822062, AX-582853985 |
3 | 450,000,000 | AX-582884221 |
3 | 480,000,000 | AX-583025646 |
pcadapt performs principal component analysis and computes p-values to test for outliers. The test for outliers is based on the correlations between genetic variation and the first K principal components. pcadapt also handles Pool-seq data for which the statistical analysis is performed on the genetic markers frequencies. Returns an object of class pcadapt.
Clean env and memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3133463 167.4 4677312 249.8 NA 4677312 249.8
## Vcells 6176454 47.2 21055934 160.7 32768 21055500 160.7
# Run pcadapt with a large number of PCs
pcadapt_res <- pcadapt(x, K = 30)
# Create a scree plot
plot(pcadapt_res, option = "screeplot")Choose PCs
# perform the analysis
pcadapt_res <- pcadapt(
x,
method = c("mahalanobis"),
min.maf = 0.1,
LD.clumping = NULL,
tol = 1e-04,
K = 2) # choose the right K for your data
str(pcadapt_res)## List of 11
## $ scores : num [1:50, 1:2] -0.1556 -0.1445 -0.0718 -0.116 -0.1717 ...
## $ singular.values: num [1:2] 0.408 0.293
## $ loadings : num [1:41468, 1:2] -0.00753 NA 0.00585 0.00398 0.00379 ...
## $ zscores : num [1:41468, 1:2] -5.38 NA 2.9 2.27 1.94 ...
## $ af : num [1:41468] 0.888 0.967 0.728 0.86 0.6 ...
## $ maf : num [1:41468] 0.1122 0.0326 0.2717 0.14 0.4 ...
## $ chi2.stat : num [1:41468] 6.86 NA 0.892 0.624 0.381 ...
## $ stat : num [1:41468] 9.431 NA 1.227 0.857 0.524 ...
## $ gif : num 1.37
## $ pvalues : num [1:41468] 0.0324 NA 0.6401 0.7322 0.8265 ...
## $ pass : int [1:30833] 1 3 4 5 6 7 8 9 10 12 ...
## - attr(*, "K")= num 2
## - attr(*, "method")= chr "mahalanobis"
## - attr(*, "min.maf")= num 0.1
## - attr(*, "class")= chr "pcadapt"
# ____________________________________________________________________________
# import the bim file with the SNP data ####
snps <-
read_delim( # to learn about the options use here, run ?read_delim on the console.
here(
"output", "outflank", "new_aut.bim"
), # use library here to load it
col_names = FALSE, # we don't have header in the input file
show_col_types = FALSE, # suppress message from read_delim
col_types = "ccidcc" # set the class of each column
)
#
# set column names
colnames(
snps
) <- # to add a header in our tibble
c(
"Scaffold", "SNP", "Cm", "Position", "Allele1", "Allele2"
)
#
# check the tibble
head(snps)## # A tibble: 6 × 6
## Scaffold SNP Cm Position Allele1 Allele2
## <chr> <chr> <int> <dbl> <chr> <chr>
## 1 1 AX-581444870 0 97856 C T
## 2 1 AX-583035083 0 305518 A G
## 3 1 AX-583035102 0 308124 A G
## 4 1 AX-583033342 0 315059 C G
## 5 1 AX-583035163 0 315386 A G
## 6 1 AX-583035198 0 330908 G T
# Combine into a data frame
pcadapt_res_df <- data.frame(
SNP = snps$SNP,
Chromosome = snps$Scaffold,
Position = snps$Position,
pvalues = pcadapt_res$pvalues,
stat = pcadapt_res$stat
) |> drop_na()
head(pcadapt_res_df)## SNP Chromosome Position pvalues stat
## 1 AX-581444870 1 97856 0.03239121 9.4305815
## 2 AX-583035102 1 308124 0.64013391 1.2265118
## 3 AX-583033342 1 315059 0.73215941 0.8571903
## 4 AX-583035163 1 315386 0.82646667 0.5240517
## 5 AX-583035198 1 330908 0.55989336 1.5947615
## 6 AX-583035257 1 442875 0.28774446 3.4250617
Adjust
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Get the SNP IDs of the outliers
outlier_snps <- pcadapt_res_df$SNP[outliers_indices]
# Print the SNP IDs
length(outlier_snps)## [1] 67
# Save it
write.table(
outlier_snps,
file = here("output", "pcadapt","new_aut_SNPs_pcadapt.txt"),
row.names = FALSE,
quote = FALSE,
col.names = FALSE,
sep = "\n"
)# Set colors for the chromosomes
color_vector <- c("#CCF6D6", "#F6E1CC", "#CCD8F6") # Add or remove colors as needed.
names(color_vector) <- unique(pcadapt_res_df$Chromosome) # Make sure unique(df_sub$Chromosome) gives all unique Chromosome values.
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Create a new 'highlight' column, initially set to FALSE
pcadapt_res_df$highlight <- FALSE
# Update the 'highlight' column to highlight these outlier SNPs
pcadapt_res_df$highlight[outliers_indices] <- TRUE
# Identify the SNP with the smallest p-value in each chromosome among the associated SNPs
min_pval_snps <- pcadapt_res_df |>
filter(highlight) |>
group_by(Chromosome) |>
slice(which.min(pvalues))
# Custom label function
k_format <- function(x) {
paste0(format(x / 1e6, big.mark = "", scientific = FALSE), "k")
}
# source the plotting function
source(
here(
"scripts", "analysis", "my_theme2.R"
)
)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .5) +
geom_point(
color = "magenta",
data = subset(pcadapt_res_df, highlight == TRUE),
size = .5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(title = "pcadapt Brazil", x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after Benjamini-Hochberg adjustment (p<0.05).") +
facet_wrap(~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_markdown(face = "italic", color = "#574E4E")) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "new_aut_pcadapt.pdf"
),
width = 8,
height = 5,
units = "in"
)Choosing a cutoff for outlier detection
q-values
qval <- qvalue(pcadapt_res$pvalues)$qvalues
alpha <- 0.05
outliers <- which(qval < alpha)
length(outliers)## [1] 67
Benjamini-Hochberg Procedure
padj <- p.adjust(pcadapt_res$pvalues,method="BH")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 67
Bonferroni correction
padj <- p.adjust(pcadapt_res$pvalues,method="bonferroni")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 17
Loadings
par(mfrow = c(2, 2))
for (i in 1:2)
plot(pcadapt_res$loadings[, i], pch = 19, cex = .3, ylab = paste0("Loadings PC", i))# Read data from txt files
outflank_snps <-
read.table(
here("output", "outflank", "new_aut_SNPs_outFlank.txt"),
stringsAsFactors = FALSE
) |>
drop_na()
pcadapt_snps <-
read.table(
here("output", "pcadapt", "new_aut_SNPs_pcadapt.txt"),
stringsAsFactors = FALSE
)
# Create a list with all dataframes
list_of_clusters <- list("outFlank" = outflank_snps$V1, "pcadapt" = pcadapt_snps$V1)
# Create Venn diagram
venn_diagram <- ggvenn(list_of_clusters, fill_color = c("steelblue", "darkorange"))
print(venn_diagram)# Find common SNPs
common_SNPs <- Reduce(intersect, list_of_clusters)
# Save the shared SNP ids into a txt file
write.table(
common_SNPs,
file = here("output", "pcadapt", "new_aut_common_SNPs_pcadapt_outflank.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE
)
# # Save Venn diagram to PDF
output_path <- here("output", "pcadapt", "figures", "new_aut_significant_snps.pdf")
ggsave(output_path, venn_diagram, height = 5, width = 5, dpi = 300)With python
Clean env
Venn
import pandas as pd
from matplotlib_venn import venn2
import matplotlib.pyplot as plt
# File paths
path_outflank = 'output/outflank/new_aut_SNPs_outFlank.txt'
path_pcadapt = 'output/pcadapt/new_aut_SNPs_pcadapt.txt'
pdf_output_path = 'output/pcadapt/venn_NEW_AUT_pcadapt_outflank.pdf' # PDF output file path
# Read the data (assuming the SNP IDs are in the first column)
outflank_snps = pd.read_csv(path_outflank, sep="\t", header=None)
pcadapt_snps = pd.read_csv(path_pcadapt, sep="\t", header=None)
# Drop NA values if any
outflank_snps.dropna(inplace=True)
pcadapt_snps.dropna(inplace=True)
# Create sets from the first column of each dataframe
set_outflank = set(outflank_snps[0])
set_pcadapt = set(pcadapt_snps[0])
# Create a Venn diagram
venn2([set_outflank, set_pcadapt], ('outFlank', 'pcadapt'), set_colors=('green', 'gray'))## <matplotlib_venn._common.VennDiagram object at 0x7fcdc0dabd90>

# Clearing the environment
del path_outflank, path_pcadapt, pdf_output_path, outflank_snps, pcadapt_snps, set_outflank, set_pcadapt
plt.clf() # Clear the current figure
plt.cla() # Clear the current axes
# Garbage collection
gc.collect()## 401

Create plot with the shared SNPs
# Filter the dataframe
filtered_df <- pcadapt_res_df |> filter(SNP %in% common_SNPs)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic")
) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "new_aut_pcadapt_outflank.pdf"
),
width = 8,
height = 5,
units = "in"
)We can create windows and count how many highlighted SNPs we have in each window. Then we can focus in genomic areas with several SNPs
cluster_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNP_count = n(), .groups = 'drop')
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
geom_vline(
data = cluster_df,
aes(xintercept = Window_Start + 5000000),
color = "lightgray",
linetype = "dashed",
linewidth = 0.5
) +
geom_text_repel(
data = cluster_df,
aes(x = Window_Start + 5000000, y = 15, label = SNP_count),
vjust = -1,
hjust = -0.5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank \n Gray bars represent 10Mb windows and \n the number of SNPs per window is annotated in black.") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic"),
legend.position = "none" # Remove legend
) +
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)Check the SNP ids for each cluster
# Create a new dataframe to capture SNPs within each 100kb window for each chromosome
cluster_snps_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNPs = list(SNP), .groups = 'drop')
# Unnest the SNPs column to show each SNP on its own row
unnested_cluster_snps_df <- cluster_snps_df %>%
unnest(SNPs)
# View the unnested data
head(unnested_cluster_snps_df)## # A tibble: 6 × 3
## Chromosome Window_Start SNPs
## <chr> <dbl> <chr>
## 1 1 140000000 AX-583515734
## 2 1 140000000 AX-583518586
## 3 1 140000000 AX-583516491
## 4 1 140000000 AX-583517344
## 5 1 140000000 AX-583521326
## 6 1 140000000 AX-583531703
# Convert the list column to a string column
cluster_snps_df_str <- cluster_snps_df %>%
mutate(SNPs = sapply(SNPs, function(x) paste(x, collapse = ", ")))
# Create the flextable
my_flextable <- flextable(cluster_snps_df_str, cwidth = 2.75)
my_flextable <- autofit(my_flextable)
# Display the flextable
my_flextableChromosome | Window_Start | SNPs |
|---|---|---|
1 | 140,000,000 | AX-583515734, AX-583518586, AX-583516491, AX-583517344, AX-583521326, AX-583531703 |
1 | 220,000,000 | AX-583750740, AX-583750871 |
1 | 260,000,000 | AX-583882507, AX-583878485 |
1 | 270,000,000 | AX-583924978 |
1 | 300,000,000 | AX-583972796 |
2 | 20,000,000 | AX-584399488, AX-584419201 |
2 | 30,000,000 | AX-584429924 |
2 | 120,000,000 | AX-585110132 |
2 | 180,000,000 | AX-585196879 |
2 | 190,000,000 | AX-582457468, AX-582465641 |
2 | 200,000,000 | AX-582535272 |
2 | 320,000,000 | AX-584907866 |
2 | 380,000,000 | AX-579548089 |
2 | 390,000,000 | AX-579556477, AX-579560686, AX-579564292, AX-579565469, AX-579584369, AX-579583040, AX-579584883 |
2 | 400,000,000 | AX-579596233, AX-579602153, AX-579618571 |
2 | 410,000,000 | AX-579630465, AX-579632196, AX-579632983, AX-579632074, AX-579632927, AX-579661979 |
2 | 420,000,000 | AX-579681911, AX-579693902, AX-579697016 |
2 | 500,000,000 | AX-579940789 |
2 | 570,000,000 | AX-580238721 |
3 | 0 | AX-580344997 |
3 | 10,000,000 | AX-580398903 |
3 | 30,000,000 | AX-580480108 |
3 | 40,000,000 | AX-580560428 |
3 | 50,000,000 | AX-580575055 |
3 | 130,000,000 | AX-580917486, AX-580925458 |
3 | 150,000,000 | AX-581030099 |
3 | 180,000,000 | AX-581275893 |
3 | 190,000,000 | AX-581302901 |
3 | 210,000,000 | AX-581428225, AX-581437212, AX-581438408, AX-581442470, AX-581462648, AX-581467653 |
3 | 230,000,000 | AX-581528715, AX-581534474, AX-581543825 |
3 | 290,000,000 | AX-581810815 |
3 | 320,000,000 | AX-581929530 |
pcadapt performs principal component analysis and computes p-values to test for outliers. The test for outliers is based on the correlations between genetic variation and the first K principal components. pcadapt also handles Pool-seq data for which the statistical analysis is performed on the genetic markers frequencies. Returns an object of class pcadapt.
Clean env and memory
# Remove all objects from the environment
rm(list = ls())
# Run the garbage collector to free up memory
gc()## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 3134946 167.5 4677312 249.8 NA 4677312 249.8
## Vcells 5953847 45.5 21055934 160.7 32768 21055934 160.7
# Run pcadapt with a large number of PCs
pcadapt_res <- pcadapt(x, K = 30)
# Create a scree plot
plot(pcadapt_res, option = "screeplot")Choose PCs
# perform the analysis
pcadapt_res <- pcadapt(
x,
method = c("mahalanobis"),
min.maf = 0.1,
LD.clumping = NULL,
tol = 1e-04,
K = 3) # choose the right K for your data
str(pcadapt_res)## List of 11
## $ scores : num [1:32, 1:3] -0.0626 -0.1343 -0.0743 -0.1028 -0.0451 ...
## $ singular.values: num [1:3] 0.361 0.335 0.26
## $ loadings : num [1:40985, 1:3] 0.00293 NA 0.0055 0.0033 0.00196 ...
## $ zscores : num [1:40985, 1:3] 1.298 NA 1.979 1.322 0.746 ...
## $ af : num [1:40985] 0.774 0.981 0.783 0.891 0.703 ...
## $ maf : num [1:40985] 0.2258 0.0185 0.2167 0.1094 0.2969 ...
## $ chi2.stat : num [1:40985] 1.751 NA 2.479 1.159 0.432 ...
## $ stat : num [1:40985] 2.208 NA 3.127 1.462 0.545 ...
## $ gif : num 1.26
## $ pvalues : num [1:40985] 0.626 NA 0.479 0.763 0.934 ...
## $ pass : int [1:33022] 1 3 4 5 6 7 8 9 10 12 ...
## - attr(*, "K")= num 3
## - attr(*, "method")= chr "mahalanobis"
## - attr(*, "min.maf")= num 0.1
## - attr(*, "class")= chr "pcadapt"
# ____________________________________________________________________________
# import the bim file with the SNP data ####
snps <-
read_delim( # to learn about the options use here, run ?read_delim on the console.
here(
"output", "outflank", "man_new.bim"
), # use library here to load it
col_names = FALSE, # we don't have header in the input file
show_col_types = FALSE, # suppress message from read_delim
col_types = "ccidcc" # set the class of each column
)
#
# set column names
colnames(
snps
) <- # to add a header in our tibble
c(
"Scaffold", "SNP", "Cm", "Position", "Allele1", "Allele2"
)
#
# check the tibble
head(snps)## # A tibble: 6 × 6
## Scaffold SNP Cm Position Allele1 Allele2
## <chr> <chr> <int> <dbl> <chr> <chr>
## 1 1 AX-581444870 0 97856 C T
## 2 1 AX-583035083 0 305518 A G
## 3 1 AX-583035102 0 308124 A G
## 4 1 AX-583033342 0 315059 C G
## 5 1 AX-583035163 0 315386 A G
## 6 1 AX-583035198 0 330908 G T
# Combine into a data frame
pcadapt_res_df <- data.frame(
SNP = snps$SNP,
Chromosome = snps$Scaffold,
Position = snps$Position,
pvalues = pcadapt_res$pvalues,
stat = pcadapt_res$stat
) |> drop_na()
head(pcadapt_res_df)## SNP Chromosome Position pvalues stat
## 1 AX-581444870 1 97856 0.6257264 2.2084728
## 2 AX-583035102 1 308124 0.4791261 3.1270554
## 3 AX-583033342 1 315059 0.7628384 1.4621540
## 4 AX-583035163 1 315386 0.9335466 0.5450194
## 5 AX-583035198 1 330908 0.8435517 1.0403490
## 6 AX-583035257 1 442875 0.8339275 1.0908654
Adjust
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Get the SNP IDs of the outliers
outlier_snps <- pcadapt_res_df$SNP[outliers_indices]
# Print the SNP IDs
length(outlier_snps)## [1] 79
# Save it
write.table(
outlier_snps,
file = here("output", "pcadapt","man_new_SNPs_pcadapt.txt"),
row.names = FALSE,
quote = FALSE,
col.names = FALSE,
sep = "\n"
)# Set colors for the chromosomes
color_vector <- c("#CCF6D6", "#F6E1CC", "#CCD8F6") # Add or remove colors as needed.
names(color_vector) <- unique(pcadapt_res_df$Chromosome) # Make sure unique(df_sub$Chromosome) gives all unique Chromosome values.
# Adjust the p-values using Benjamini-Hochberg method
padj <- p.adjust(pcadapt_res_df$pvalues,method="BH")
# Define significance level
alpha <- 0.05
# Get indices of significant SNPs
outliers_indices <- which(padj < alpha)
# Create a new 'highlight' column, initially set to FALSE
pcadapt_res_df$highlight <- FALSE
# Update the 'highlight' column to highlight these outlier SNPs
pcadapt_res_df$highlight[outliers_indices] <- TRUE
# Identify the SNP with the smallest p-value in each chromosome among the associated SNPs
min_pval_snps <- pcadapt_res_df |>
filter(highlight) |>
group_by(Chromosome) |>
slice(which.min(pvalues))
# Custom label function
k_format <- function(x) {
paste0(format(x / 1e6, big.mark = "", scientific = FALSE), "k")
}
# source the plotting function
source(
here(
"scripts", "analysis", "my_theme2.R"
)
)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .5) +
geom_point(
color = "magenta",
data = subset(pcadapt_res_df, highlight == TRUE),
size = .5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(title = "pcadapt Brazil", x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after Benjamini-Hochberg adjustment (p<0.05).") +
facet_wrap(~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_markdown(face = "italic", color = "#574E4E")) +
# Annotate only the SNP with the smallest p-value for each chromosome among the associated SNPs
geom_text_repel(
data = min_pval_snps,
aes(label = SNP),
size = 3,
nudge_y = 0.01,
segment.color = NA,
max.overlaps = Inf # Set to 'Inf' to show all annotations
)# Save the plot
ggsave(
here(
"output", "pcadapt", "figures", "man_new_pcadapt.pdf"
),
width = 8,
height = 5,
units = "in"
)Choosing a cutoff for outlier detection
q-values
qval <- qvalue(pcadapt_res$pvalues)$qvalues
alpha <- 0.05
outliers <- which(qval < alpha)
length(outliers)## [1] 88
Benjamini-Hochberg Procedure
padj <- p.adjust(pcadapt_res$pvalues,method="BH")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 79
Bonferroni correction
padj <- p.adjust(pcadapt_res$pvalues,method="bonferroni")
alpha <- 0.05
outliers <- which(padj < alpha)
length(outliers)## [1] 21
Loadings
par(mfrow = c(2, 2))
for (i in 1:2)
plot(pcadapt_res$loadings[, i], pch = 19, cex = .3, ylab = paste0("Loadings PC", i))We can make a venn diagram because outFlank did not identify any outlier.
# Read data from txt files
NEW_MAN_AUT <-
read.table(
here("output", "pcadapt", "common_SNPs_pcadapt_outflank.txt"),
stringsAsFactors = FALSE
) |>
drop_na()
MAN_AUT <-
read.table(
here("output", "pcadapt", "man_aut_common_SNPs_pcadapt_outflank.txt"),
stringsAsFactors = FALSE
)
NEW_AUT <-
read.table(
here("output", "pcadapt", "new_aut_common_SNPs_pcadapt_outflank.txt"),
stringsAsFactors = FALSE
)
NEW_MAN <-
read.table(
here("output", "pcadapt", "man_new_SNPs_pcadapt.txt"),
stringsAsFactors = FALSE
)
# Create a list with all dataframes
list_of_clusters <- list("NEW_MAN_AUT" = NEW_MAN_AUT$V1, "MAN_AUT" = MAN_AUT$V1, "NEW_AUT" = NEW_AUT$V1, "NEW_MAN" = NEW_MAN$V1)
# Create Venn diagram
venn_diagram <- ggvenn(list_of_clusters)
# Increase plot margins
venn_diagram <- venn_diagram + theme(plot.margin = margin(60, 60, 60, 60, "points"))
# Resize text
venn_diagram <- venn_diagram + theme(text = element_text(size = 5))
# Print the adjusted plot
print(venn_diagram)# Find the intersect of NEW_MAN_AUT, MAN_AUT, and NEW_AUT
common_elements <- Reduce(intersect, list(NEW_MAN_AUT$V1, MAN_AUT$V1, NEW_AUT$V1))
# # Save the shared SNP ids into a txt file
write.table(
common_elements,
file = here("output", "pcadapt", "4_way_venn_common_SNPs_pcadapt_outflank.txt"),
row.names = FALSE,
col.names = FALSE,
quote = FALSE
)
# # Save Venn diagram to PDF
output_path <- here("output", "pcadapt", "figures", "4_way_venn_significant_snps.pdf")
ggsave(output_path, venn_diagram, height = 8, width = 8, dpi = 300)With python Create plot with the shared SNPs
# Filter the dataframe
filtered_df <- pcadapt_res_df |>
filter(SNP %in% common_elements)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic")
)We can create windows and count how many highlighted SNPs we have in each window. Then we can focus in genomic areas with several SNPs
cluster_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNP_count = n(), .groups = 'drop')
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
geom_vline(
data = cluster_df,
aes(xintercept = Window_Start + 5000000),
color = "lightgray",
linetype = "dashed",
linewidth = 0.5
) +
geom_text_repel(
data = cluster_df,
aes(x = Window_Start + 5000000, y = 10, label = SNP_count),
vjust = -1,
hjust = -0.5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank \n Gray bars represent 10Mb windows and \n the number of SNPs per window is annotated in black.") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic"),
legend.position = "none" # Remove legend
) # # Save Venn diagram to PDF
output_path <- here("output", "pcadapt", "figures", "manhattan_from_4_way_venn_snps.pdf")
ggsave(output_path, height = 5, width = 8, dpi = 300)Check the SNP ids for each cluster
# Create a new dataframe to capture SNPs within each 100kb window for each chromosome
cluster_snps_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNPs = list(SNP), .groups = 'drop')
# Unnest the SNPs column to show each SNP on its own row
unnested_cluster_snps_df <- cluster_snps_df %>%
unnest(SNPs)
# View the unnested data
head(unnested_cluster_snps_df)## # A tibble: 6 × 3
## Chromosome Window_Start SNPs
## <chr> <dbl> <chr>
## 1 2 180000000 AX-585196879
## 2 2 380000000 AX-579548089
## 3 2 390000000 AX-579560686
## 4 2 410000000 AX-579661979
## 5 3 180000000 AX-581275893
## 6 3 210000000 AX-581442470
# Convert the list column to a string column
cluster_snps_df_str <- cluster_snps_df %>%
mutate(SNPs = sapply(SNPs, function(x) paste(x, collapse = ", ")))
# Create the flextable
my_flextable <- flextable(cluster_snps_df_str, cwidth = 2.75)
my_flextable <- autofit(my_flextable)
# Display the flextable
my_flextableChromosome | Window_Start | SNPs |
|---|---|---|
2 | 180,000,000 | AX-585196879 |
2 | 380,000,000 | AX-579548089 |
2 | 390,000,000 | AX-579560686 |
2 | 410,000,000 | AX-579661979 |
3 | 180,000,000 | AX-581275893 |
3 | 210,000,000 | AX-581442470, AX-581462648, AX-581467653 |
# Initialize a Word document
doc <- read_docx()
# Add flextable to Word document
doc <- body_add_flextable(doc, value = my_flextable)
# Save the Word document
print(doc, target = here("output", "pcadapt", "4_way_venn_pcadap_outflank.docx"))We can also highlight all the 158 SNPs from the selection scans
# Combine all three data frames into one
combined_df <- bind_rows(MAN_AUT, NEW_AUT, NEW_MAN_AUT)
# Count unique values and add a 'Count' column
result <- combined_df %>%
group_by(V1) %>%
summarise(Count = n()) %>%
arrange(V1) |>
dplyr::rename(SNP = V1)
# Show the result
head(result)## # A tibble: 6 × 2
## SNP Count
## <chr> <int>
## 1 AX-579474142 1
## 2 AX-579474650 1
## 3 AX-579509845 1
## 4 AX-579548089 3
## 5 AX-579556477 1
## 6 AX-579560686 3
Create plot with the shared SNPs
# Filter the dataframe
filtered_df <- pcadapt_res_df |>
filter(SNP %in% result$SNP)
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic")
)We can create windows and count how many highlighted SNPs we have in each window. Then we can focus in genomic areas with several SNPs
cluster_df <- filtered_df |>
mutate(Window_Start = floor(Position / 10000000) * 10000000) |>
group_by(Chromosome, Window_Start) |>
summarise(SNP_count = n(), .groups = 'drop') |>
dplyr::filter(SNP_count >= 3) # plot lines for windows with 3 or more SNPs
# Create the plot
ggplot(pcadapt_res_df, aes(x = Position, y = -log10(pvalues))) +
geom_point(
aes(color = Chromosome),
data = subset(pcadapt_res_df, highlight == FALSE),
size = .3
) +
geom_point(
color = "magenta",
data = subset(filtered_df, highlight == TRUE),
size = .3
) +
geom_vline(
data = cluster_df,
aes(xintercept = Window_Start + 5000000),
color = "lightgray",
linetype = "dashed",
linewidth = 0.5
) +
geom_text_repel(
data = cluster_df,
aes(x = Window_Start + 5000000, y = 10, label = SNP_count),
vjust = -1,
hjust = -0.5
) +
scale_color_manual(values = color_vector, guide = "none") +
labs(x = "Position", y = "-log10(p-value)", caption = "SNPs highlighted in magenta are significant after \n Benjamini-Hochberg adjustment (p<0.001) with pcadapt, \nand were identified as outliers with outFlank \n Gray bars represent 10Mb windows and \n the number of SNPs per window is annotated in black.") +
facet_wrap( ~ Chromosome, scales = "free_x") +
scale_x_continuous(labels = k_format) +
my_theme() +
theme(
panel.spacing = unit(.2, "lines"),
plot.margin = unit(c(1, 1, 2, 2), "lines"),
plot.caption = element_text(face = "italic"),
legend.position = "none" # Remove legend
) # # Save Venn diagram to PDF
output_path <- here("output", "pcadapt", "figures", "manhattan_from_4_way_venn_158_snps.pdf")
ggsave(output_path, height = 5, width = 8, dpi = 300)Check the SNP ids for each cluster
# Create a new dataframe to capture SNPs within each 100kb window for each chromosome
cluster_snps_df <- filtered_df %>%
mutate(Window_Start = floor(Position / 10000000) * 10000000) %>%
group_by(Chromosome, Window_Start) %>%
summarise(SNPs = list(SNP), .groups = 'drop')
# Unnest the SNPs column to show each SNP on its own row
unnested_cluster_snps_df <- cluster_snps_df %>%
unnest(SNPs)
# View the unnested data
head(unnested_cluster_snps_df)## # A tibble: 6 × 3
## Chromosome Window_Start SNPs
## <chr> <dbl> <chr>
## 1 1 0 AX-583054970
## 2 1 10000000 AX-583095890
## 3 1 90000000 AX-583324654
## 4 1 120000000 AX-583423493
## 5 1 120000000 AX-583426050
## 6 1 130000000 AX-583467551
# Convert the list column to a string column
cluster_snps_df_str <- cluster_snps_df %>%
mutate(SNPs = sapply(SNPs, function(x) paste(x, collapse = ", ")))
# Create the flextable
my_flextable <- flextable(cluster_snps_df_str, cwidth = 2.75)
my_flextable <- autofit(my_flextable)
# Display the flextable
my_flextableChromosome | Window_Start | SNPs |
|---|---|---|
1 | 0 | AX-583054970 |
1 | 10,000,000 | AX-583095890 |
1 | 90,000,000 | AX-583324654 |
1 | 120,000,000 | AX-583423493, AX-583426050 |
1 | 130,000,000 | AX-583467551, AX-583504302 |
1 | 140,000,000 | AX-583514148, AX-583518586, AX-583518994, AX-583516491, AX-583517344, AX-583517999, AX-583521326, AX-583531703 |
1 | 170,000,000 | AX-583631472 |
1 | 220,000,000 | AX-583750740, AX-583750871 |
1 | 260,000,000 | AX-583882507, AX-583878485 |
1 | 270,000,000 | AX-583905588, AX-583924569, AX-583924978 |
1 | 300,000,000 | AX-583972796 |
1 | 340,000,000 | AX-584133664 |
2 | 20,000,000 | AX-584399488, AX-584419201 |
2 | 30,000,000 | AX-584429924, AX-584444558, AX-584453518 |
2 | 40,000,000 | AX-584473131 |
2 | 50,000,000 | AX-585010439, AX-584502184, AX-585023213 |
2 | 70,000,000 | AX-585042322 |
2 | 80,000,000 | AX-584546335 |
2 | 110,000,000 | AX-585101539 |
2 | 120,000,000 | AX-585110132 |
2 | 170,000,000 | AX-585182739 |
2 | 180,000,000 | AX-585190366, AX-585196879 |
2 | 190,000,000 | AX-582457468, AX-582465641 |
2 | 200,000,000 | AX-582494788, AX-582519052, AX-582532563, AX-582535272 |
2 | 260,000,000 | AX-584765066 |
2 | 320,000,000 | AX-585413917, AX-584907866 |
2 | 350,000,000 | AX-584951049 |
2 | 360,000,000 | AX-579474142, AX-579474650 |
2 | 370,000,000 | AX-579509845 |
2 | 380,000,000 | AX-579548089 |
2 | 390,000,000 | AX-579556477, AX-579560686, AX-579564292, AX-579565469, AX-579580051 |
2 | 400,000,000 | AX-579596233, AX-579602153, AX-579603553, AX-579606795, AX-579618571, AX-579619995, AX-579620627 |
2 | 410,000,000 | AX-579630462, AX-579632983, AX-579632074, AX-579636106, AX-579638540, AX-579640459, AX-579646083, AX-579661979, AX-579662371, AX-579667335 |
2 | 420,000,000 | AX-579693902, AX-579697016 |
2 | 450,000,000 | AX-579768613, AX-579778684 |
2 | 460,000,000 | AX-579808128 |
2 | 490,000,000 | AX-579909895 |
2 | 500,000,000 | AX-579940789 |
2 | 510,000,000 | AX-579955661, AX-579982534 |
2 | 520,000,000 | AX-580001880, AX-580026657, AX-580027537, AX-580042929 |
2 | 540,000,000 | AX-580099245 |
2 | 550,000,000 | AX-580165355 |
2 | 560,000,000 | AX-580192850, AX-580195612 |
2 | 570,000,000 | AX-580238721 |
2 | 580,000,000 | AX-580318010, AX-580321888 |
3 | 0 | AX-580342672, AX-580344997 |
3 | 10,000,000 | AX-580398903 |
3 | 20,000,000 | AX-580425125 |
3 | 40,000,000 | AX-580560428 |
3 | 50,000,000 | AX-580575055 |
3 | 70,000,000 | AX-580620056 |
3 | 80,000,000 | AX-580673467 |
3 | 90,000,000 | AX-580719743 |
3 | 130,000,000 | AX-580917486, AX-580925458, AX-580926966 |
3 | 150,000,000 | AX-581030099 |
3 | 180,000,000 | AX-581275893 |
3 | 190,000,000 | AX-581298415, AX-581302704, AX-581302901 |
3 | 200,000,000 | AX-581399096 |
3 | 210,000,000 | AX-581408938, AX-581413330, AX-581428225, AX-581442470, AX-581458697, AX-581461793, AX-581462648, AX-581467653, AX-581470736 |
3 | 220,000,000 | AX-581485809, AX-581488185, AX-581504582 |
3 | 230,000,000 | AX-581528715, AX-581543825, AX-581572119 |
3 | 280,000,000 | AX-581761610 |
3 | 290,000,000 | AX-581810815 |
3 | 320,000,000 | AX-581929530 |
3 | 350,000,000 | AX-582085824 |
3 | 380,000,000 | AX-582237115 |
3 | 440,000,000 | AX-582822062, AX-582853985 |
3 | 450,000,000 | AX-582884221 |
3 | 480,000,000 | AX-583025646 |